Tasks complete 6/6
getwd()
## [1] "/Users/hunterdarr/Documents/RPACKAGES/FALL224753darr0006/LAB8"
labSample = runif(10,0,5)
labSample
## [1] 4.0019253 4.5725337 0.2698062 2.4463107 0.7350226 3.4738943 2.3056199
## [8] 3.3662620 2.6770966 1.7441278
Mean = \((0 + 5)/2\) = 2.5 Varience = \(((5-0)^2)/12\) = 2.08333333333
mean(labSample)
## [1] 2.55926
var(labSample)
## [1] 1.883024
For the mean and varience that I am currently looking at is [1] 3.135563 [1] 2.212281
These numbers are far off from our estimates. I believe this is because we have a low sample size.
The sample I am using is: [1] 1.7273554 4.0384836 2.4628431 4.7507547 1.8641004 0.3166395 4.2169844 3.2129434 [9] 4.7000881 4.0654404
T = the sum of Y’s = \(1.7273554 + 4.0384836 + 2.4628431 + 4.7507547 + 1.8641004 + 0.3166395 + 4.2169844 + 3.2129434 + 4.7000881 + 4.0654404\) = 31.35563
mean of Y = T/n = 31.35563/10 = 3.135563
Line A creates a uniform distribution with size = n*iter, a lower limit of 0, and a upper limit of 5.
Line B makes a matrix based off of the dstribution we made in line A.
Line C outputs our matrix as a list and stores it in sm.
Line D calls upon our function with n = 10 and iter = 10000.
myclt=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,sum) #C
hist(sm)
sm
}
w=myclt(n=10,iter=10000) #D
mycltChanged=function(n,iter){
y=runif(n*iter,0,5) # A
data=matrix(y,nr=n,nc=iter,byrow=TRUE) #B
sm=apply(data,2,mean) #C
hist(sm)
sm
}
w=mycltChanged(n=10,iter=10000) #D
## Notice that I have assigned default values which can be changed when the function is called
mycltu=function(n,iter,a=0,b=10){
## r-random sample from the uniform
y=runif(n*iter,a,b)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
hist(w,freq=FALSE, ylim=c(0,ymax), main=paste("Histogram of sample mean",
"\n", "sample size= ",n,sep=""),xlab="Sample mean")
## add a density curve made from the sample distribution
lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=(a+b)/2,sd=(b-a)/(sqrt(12*n))),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
## Add the density from which the samples were taken
curve(dunif(x,a,b),add=TRUE,lwd=4)
}
#mycltu(n=20,iter=100000)
The specifies whether to apply the mean to the columns or the rows. The 2 specifies that we will take the mean of the of the columns. This makes sense because the columns are our iterations. For example if we have a size of 10 and 20 iterations, each iteration will have 10 numbers. We want to get the mean of the iterations so we will mean the columns(which is 2).
w will have 100000 terms. This is because 2 stores the mean of our iterations and we have 100000 iterations.
We can prove this with the following R code:
yExample=runif(20*100000,0,5)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
dataExample=matrix(yExample,nr=20,nc=100000,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
wExample=apply(dataExample,2,mean)
## We will make a histog
length(wExample)
## [1] 100000
This plots the theoretical normal curve using dnorm.
The standard deviation is (b-a)/(sqrt(12*n)) because ((b-a)^2)/12 is the uniform distribution variance. To find the sd we square root the varience. the 12 times n exists because we need to adjust for our sample size.
mycltu(1,10000,0,10)
mycltu(2,10000,0,10)
mycltu(3,10000,0,10)
mycltu(5,10000,0,10)
mycltu(10,10000,0,10)
mycltu(30,10000,0,10)
I can conclude that if a sample is large enough it will accurately represent a larger sample. We can see that once n=10 it looks nearly the same as n=30. However, n=1 does not look the same and n=2, while more similar than n=1, does not look similar to n=30.
mycltb=function(n,iter,p=0.5,...){
## r-random sample from the Binomial
y=rbinom(n*iter,size=n,prob=p)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Now we can make the histogram
## freq=FALSE means take a density
hist(w,freq=FALSE, ylim=c(0,ymax),
main=paste("Histogram of sample mean","\n", "sample size= ",n,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=n*p,sd=sqrt(p*(1-p))),add=TRUE,col="Red",lty=2,lwd=3)
}
#mycltb(n=5,iter=10000,p=0.5)
mycltb(n=4,iter=10000,p=0.3)
mycltb(n=5,iter=10000,p=0.3)
mycltb(n=10,iter=10000,p=0.3)
mycltb(n=20,iter=10000,p=0.3)
mycltb(n=4,iter=10000,p=0.7)
mycltb(n=5,iter=10000,p=0.7)
mycltb(n=10,iter=10000,p=0.7)
mycltb(n=20,iter=10000,p=0.7)
mycltb(n=4,iter=10000,p=0.5)
mycltb(n=5,iter=10000,p=0.5)
mycltb(n=10,iter=10000,p=0.5)
mycltb(n=20,iter=10000,p=0.5)
My conclusion is similar to task 3 in the sense that if you have a large enough sample, it will accurately represent a larger sample.
mycltp=function(n,iter,lambda=10,...){
## r-random sample from the Poisson
y=rpois(n*iter,lambda=lambda)
## Place these numbers into a matrix
## The columns will correspond to the iteration and the rows will equal the sample size n
data=matrix(y,nr=n,nc=iter,byrow=TRUE)
## apply the function mean to the columns (2) of the matrix
## these are placed in a vector w
w=apply(data,2,mean)
## We will make a histogram of the values in w
## How high should we make y axis?
## All the values used to make a histogram are placed in param (nothing is plotted yet)
param=hist(w,plot=FALSE)
## Since the histogram will be a density plot we will find the max density
ymax=max(param$density)
## To be on the safe side we will add 10% more to this
ymax=1.1*ymax
## Make a suitable layout for graphing
layout(matrix(c(1,1,2,3),nr=2,nc=2, byrow=TRUE))
## Now we can make the histogram
hist(w,freq=FALSE, ylim=c(0,ymax), col=rainbow(max(w)),
main=paste("Histogram of sample mean","\n", "sample size= ",n," iter=",iter," lambda=",lambda,sep=""),
xlab="Sample mean",...)
## add a density curve made from the sample distribution
#lines(density(w),col="Blue",lwd=3) # add a density plot
## Add a theoretical normal curve
curve(dnorm(x,mean=lambda,sd=sqrt(lambda/n)),add=TRUE,col="Red",lty=2,lwd=3) # add a theoretical curve
# Now make a new plot
# Since y is discrete we should use a barplot
barplot(table(y)/(n*iter),col=rainbow(max(y)), main="Barplot of sampled y", ylab ="Rel. Freq",xlab="y" )
x=0:max(y)
plot(x,dpois(x,lambda=lambda),type="h",lwd=5,col=rainbow(max(y)),
main="Probability function for Poisson", ylab="Probability",xlab="y")
}
#mycltp(n=10,iter=10000)
mycltp(n=3,iter=10000,lambda=4)
mycltp(n=5,iter=10000,lambda=4)
mycltp(n=10,iter=10000,lambda=4)
mycltp(n=20,iter=10000,lambda=4)
mycltp(n=3,iter=10000,lambda=10)
mycltp(n=5,iter=10000,lambda=10)
mycltp(n=10,iter=10000,lambda=10)
mycltp(n=20,iter=10000,lambda=10)
I picked the mycltu function.
dontShowOutput = FALL224753darr0006::mycltu(n=54,iter=100000)